# -*- coding: utf-8 -*-
# @Date    : 2021/4/28
# @Author  : Maoxian
import time

import requests
from lxml import etree
from PIL import Image
import pytesseract

login_url = 'https://so.gushiwen.cn/user/login.aspx'
code_url = 'https://so.gushiwen.cn/RandCode.ashx'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.49"
}


def code_ocr(img_path):
    """使用OCR识别验证码"""
    # 参考: https://blog.csdn.net/qq_38161040/article/details/90668765
    # Tesseract-OCR 下载: https://github.com/UB-Mannheim/tesseract/wiki
    # 优化识别: https://www.programmersought.com/article/5349637435/

    img = Image.open(img_path)

    img = img.convert('L')
    threshold = 150
    table = []
    for i in range(256):
        if i < threshold:
            table.append(0)
        else:
            table.append(1)
    img = img.point(table, "1")  # 通过表转换成二进制图片，1为白色，0为黑色
    # img.show()  # 显示优化后的图片
    return pytesseract.image_to_string(img).strip()


def login(email, pwd):
    rs = requests.Session()
    code_img = rs.get(code_url, headers=headers).content
    with open('code.png', 'wb') as f:
        f.write(code_img)

    code = code_ocr("code.png")
    data = {
        "__VIEWSTATE": "lfY83CnGj/MvyaMOIu9zt/diHa6AOMUWtVoYXkbhczxoICxVNA9BDssZ4jru+r+ZRdYwZ2G79zpHjx2+QJvmNDCsrtAC/s+lFXgYrJeaBFrgyTUwumV7uUKRRDc=",
        "__VIEWSTATEGENERATOR": "C93BE1AE",
        "from": "http://so.gushiwen.cn/user/collect.aspx",
        "email": email,
        "pwd": pwd,
        "code": code,
        "denglu": "登录"
    }
    r = rs.post(login_url, headers=headers, data=data)
    return r


if __name__ == '__main__':
    is_login = False  # 判断是否登录
    while not is_login:
        response = login('imaoxian@qq.com', 'abcd123456')
        html = etree.HTML(response.text)
        try:
            # 成功登录后左上角有我的收藏
            if html.xpath('//div[@class="mainshoucang"]/span[1]/text()')[0] == "我的收藏":
                is_login = True
                print('登录成功')
            with open('index.html', 'w', encoding='utf8') as f:
                f.write(response.text)
        except IndexError:
            print('登录失败，正在重试...')
            time.sleep(1)
